BBS in a Box 3

home *** CD-ROM | disk | FTP | other *** search

/ BBS in a Box 3 / BBS in a box - Trilogy III.iso / Files / Prog / U-Z / VideoToolBox Folder / VideoToolboxSources / Binomial.c < prev next >

Wrap

C/C++ Source or Header | 1993-03-07 | 8.0 KB | 275 lines | [TEXT/KAHL]

/* Binomial.c Copyright (c) 1990,1991,1992 Denis G. Pelli Various routines that deal with Binomial statistics, including generating random samples and computing confidence intervals. This file contains two generations of routines that generate confidence intervals. The original routines BinomialUpperBound and BinomialLowerBound are old, but are more robust. The new routines are based on a Numerical Recipes in C routine to compute the incomplete beta function. The new routines add little to the old routines, and require the Numerical Recipes in C, so I commented them out. The new routines do have the advantage of allowing an arbitrary confidence, whereas the old routines were hard coded to produce a 95% confidence interval. HISTORY: 1/5/91 Added new binomial routines. These routines may be commented out by setting NEW_BINOMIAL to zero. 1/7/91 Rewrote BinomialUpperBound & BinomialLowerBound to allow specification of the desired confidence of the interval. 4/24/92 dgp Added BinomialSampleQuickly(). 4/27/92 dgp Oops. I just noticed that this file was always including nr.h, which you'd only have if you own the Numerical Recipes. I moved the include statement down inside the conditional below, where it belongs. */ #include "VideoToolbox.h" #include <math.h> #define NEW_BINOMIAL 0 long BinomialSample(double p,long n) /* Returns a a sample from a binomial distribution: number of heads in n flips of coin with a probability p of heads on each flip. This is fine if n is small. However, if n is large, e.g. n>100, then this routine will be rather slow, and you may prefer to use the more elaborate Numerical Recipes bnldev() routine. */ { long k,i; k=0; for(i=0;i<n;i++) if(p>UniformSample())k++; return k; } int BinomialSampleQuickly(int n) // Does n coin flips and returns the number of heads. Very fast. { register int i,k=0; register short r; for(i=n;i>=8;i-=8){ r=randU(); // Use only the upper 8 bits, which are reputed to be more random. if(r<0)k++; r<<=1; if(r<0)k++; r<<=1; if(r<0)k++; r<<=1; if(r<0)k++; r<<=1; if(r<0)k++; r<<=1; if(r<0)k++; r<<=1; if(r<0)k++; r<<=1; if(r<0)k++; r<<=1; } if(i>0){ r=randU(); for(;i>0;i--){ if(r<0)k++; r<<=1; } } return k; } /* BinomialUpperBound and BinomialLowerBound return a P confidence interval for the underlying binomial probability assumed to have generated the data. The formula is based on a Gaussian approximation, solving for the p that will put the observed result the right number of standard deviations away from the mean, plus or minus 0.5, as a "continuity correction". This is the best formula I could find in looking through several statistics books. However, the result has to be taken with a grain of salt because it is not possible to produce a binomial confidence interval that will satisfy the strict definition of a confidence interval, namely one that will have the specified probability P of containing the unknown but fixed parameter p. That's because, unlike the Normal distribution, the Binomial distribution is not translation invariant. E.g. if p is actually zero then the true confidence interval ALWAYS contains p. I gave some thought to taking a Bayesian approach, assuming a uniform prior pdf for p and then computing an a posteriori confidence interval. This can be done, but it's hard. I concluded that it's pointless because the uniform pdf assumption is usually unwarranted. Even though this confidence interval is unsatisfactory (i.e. fundamentally false) it is useful in practice since it typically behaves similarly to the Normal case, which is theoretically sound. */ double BinomialLowerBound(double P,long k,long n) /* Arguments are the confidence P, and the number k of heads in n flips of a coin. The returned value p is the lower end of a P confidence interval on the underlying probability of a head on a single trial. */ { double right,s,ss,p; if(k>0 && k<n){ right=k-0.5; s=InverseNormal(sqrt(P)); ss=s*s; p=(right+0.5*ss-s*sqrt(right*(1.0-right/n)+0.25*ss))/(n+ss); return p; } if(k==0) return 0.0; if(k==n) return pow(1.0-P,1.0/n); return sqrt(-1.0); /* domain error */ } double BinomialUpperBound(double P,long k,long n) /* Arguments are the confidence P, and the number k of heads in n flips of a coin. The returned value p is the upper end of a P confidence interval on the underlying probability of a head on a single trial. */ { double right,s,ss,p; if(k>0 && k<n){ right=k+0.5; s=InverseNormal(sqrt(P)); ss=s*s; p=(right+0.5*ss+s*sqrt(right*(1.0-right/n)+0.25*ss))/(n+ss); return p; } if(k==0) return 1.0-pow(1.0-P,1.0/n); if(k==n) return 1.0; return sqrt(-1.0); /* domain error */ } #if NEW_BINOMIAL /* new routines that require Numerical Recipes in C */ #include <nr.h> /* prototype for betai() */ #if 0 void main(void) /* a quick and dirty driver to test some of these routines */ { double p,pUpper,pLower,P,PUpper,PLower; int i,n,k; Require(0); n=10; p=.2; pLower=InverseBinomial(0.05,21,50); printf("%f, Abramowitz & Stegun p. 960 Example 18 say this should be 0.3003.\n",pLower); P=0.95; for(i=0;i<10;i++){ k=BinomialSample( p, n); pLower=BinomialLowerBound(P,k,n); pUpper=BinomialUpperBound(P,k,n); PLower=1.0-Binomial(pLower,k,n); PUpper=Binomial(pUpper,k+1,n); printf("n %4d p %6.4f %6.4f≤%6.4f≤%6.4f %6.4f< <%6.4f\n" ,n,p,pLower,k/(double)n,pUpper,PLower,PUpper); pLower=InverseBinomial(1.0-sqrt(P),k,n); pUpper=InverseBinomial(sqrt(P),k+1,n); PLower=1.0-Binomial(pLower,k,n); PUpper=Binomial(pUpper,k+1,n); printf(" %4d p %6.4f %6.4f≤%6.4f≤%6.4f %6.4f< <%6.4f\n" ,n,p,pLower,k/(double)n,pUpper,PLower,PUpper); } } #endif double Binomial(double p,long k,long n) /* Returns the probability of k or more heads in n flips, where probability of each heads is p. This identity appears in Numerical Recipes in C, page 182, and in Abramowitz and Stegun, p. 945. Eq.26.5.24. */ { if(k>n)return 0.0; if(k<=0L)return 1.0; if(p==0.0){ if(k==0)return 1.0; else return 0.0; } if(p==1.0)return 1.0; return IncompleteBeta(p,k,n-k+1); } double BinomialPdf(double p,long k,long n) /* Returns the probability of exactly k heads in n flips, where probability of each heads is p. I'm not really sure whether this is more computationally efficient than computing it directly, from the definition of the binomial distribution, but it is very easy to write, and avoids the difficulty of computing the binomial coefficient without overflow. */ { if(k<0L || k>n)return 0.0; if(k==n) return pow(p,n); return Binomial(p,k,n)-Binomial(p,k+1,n); } double InverseBinomial(double P,long k,long n) /* Returns the P-th quantile for the probability p of a heads, given k heads in n flips. pUpper=InverseBinomial(0.975,k+1,n) is a 97.5% confidence upper bound on p pLower=InverseBinomial(0.025,k,n) is a 97.5% confidence lower bound on p Taken together these bounds form a 97.5%*97.5%=95% confidence interval: [pLower,pUpper]. */ { #if 0 return InverseIncompleteBeta(P,k,n-k+1); #else /* This simple-minded bisection routine is slow, but its answer is accurate to within ±1e-10. */ double low=0.0,high=1.0,mid; double f; int i; for(i=0;i<30;i++){ mid=(low+high)/2.0; f=Binomial(mid,k,n); if(f>P)high=mid; else low=mid; } return (low+high)/2.0; #endif } double IncompleteBeta(double x,double a,double b) /* The incomplete beta function Ix(a,b). The Numerical Recipes routine assumes a>0. This work around is taken from Abramowitze and Stegun page 944, Eq. 26.5.16. */ { if(a>0.0)return betai(a,b,x); /* Numerical Recipes in C */ else return IncompleteBeta(x,a+1.0,b) +exp(gammln(a+b)-gammln(a+1.0)-gammln(b)+a*log(x)+b*log(1.0-x)); } double InverseIncompleteBeta(double p,double a,double b) /* This simple-minded bisection routine is slow, but its answer is accurate to within ±1e-10. */ { double low=0.0,high=1.0,mid; double f; int i; for(i=0;i<30;i++){ mid=(low+high)/2.0; f=IncompleteBeta(mid,a,b); if(f>p)high=mid; else low=mid; } return (low+high)/2.0; } #endif